/* (PARI-GP) R. J. Cano <remy@ula.ve>, Feb 9 2014 (updated) */

/*
 * What is this???:
 * 
 * 	This was observed by the author (closing 2007) that such matrix is
 *  in fact a clever representation for a pair of functions that would
 *  allow to compute a 3*3 (numerical) determinant using "only an index",
 *  this is with a parametrization for two of the indices in terms of the
 *  other.
 * 
 * 	The key fact is that the more simple case of permutation is a
 *  commutation. A 3*3 determinant might be reduced to a 2*2 one and
 *  for such case the determinant is based upon commutations instead
 *  of permutations.
 * 
 * 	There in the Mathematical methods for physics (particularly in
 *  classic mechanics) it is a common practice to denote the first and
 *  second time derivatives of a dynamical quantity by writing respectively
 *  one or two dots above the used symbol. Indeed some LaTEX environments
 * 	include such features as the commands \dot{}, \ddot{}. 
 * 
 * 	Another alternative usage/interpretation for the described notation is
 *  the following one:
 * 
 * 	Given an arbitrary natural number j between 1 and 3, by \dot{j} it will be
 *  referred the first natural number between 1 and 3 found when j is excluded
 *  from their enumeration. Therefore \ddot{j} will be the second number found
 *  there in the described circumstances.
 * 
 * 	Example:
 * 
 * 	Assume j=2; The natural numbers between 1 and 3 in ascending order
 *  are {1,2,3}.By excluding j we get {1,3}. Then by definition they are
 *  assigned \dot{j}=1 and \ddot{j}=3.
 * 
 * 	Such way of enumeration applied to the first three natural numbers yields
 *  the "sample3x3" vector defined initially inside this sourcecode.
 * 
 * O.k.!... & Why this curious "trick" might be important???.
 * 
 * 	In fact: It is important or "interesting" not precisely due to the numerical
 *  computation of determinants but from the conception of permutations (and
 *  algorithms generating them) instead. Such idea generalized to more than 3
 *  naturals and combined/applied properly to the Narayana's lexical order
 *  generator algorithm for permutations, reveal a parallelization technique
 *  over such algorithm.
 * 
 * 	The reader might be agree by simple inspection (of "sample3x3" above) that
 *  since there are no repetitions in these numbers, then those arrangements
 *  belong to a special class of permutations. A tentative name for such class
 *  might be the class of "partition permutations for the natural numbers".
 *
 *  ("partition" in the structural sense of decomposition for the general problem
 *  using parallel programming techniques).
 * 
 * 	A "triangle" might be considered as a tblf sequence for being proposed to the
 *  OEIS composing a row-by-row flattened "triangle" as its b-file (entirely based
 *  upon such class of permutations).
 * 
 * 	(Dear SeqFan reader:) Now the problem or question is:
 * 
 * 		HOW-TO get the matrix containing such "partition permutations" there in the
 *      first N naturals???.
 * 
 * 	As an initial answer, a method is proposed here (please see below) and the main
 *  motivation on sharing this with the seqFan members/readers is to find out a more
 *  simple way of generating such matrix. Anyone interested is encouraged for making
 *  interventions in such matter. E-mails to the author's address are also welcome.
 * 
 * (Note 1:  The alternative way for numerical 3*3 determinant computation offered
 *           above is also given here).
 *
 * (Note 2:  A PDF file illustrating about "the 2007 observation" long time forgotten
 *           is available at:  ).
 * 
 */

/*----------------- Note: The following four definitions are reserved Globals */
row=1;
dot=2;
ddot=3;

/* At the K-th row from the second column the enumeration in ascending order of
 * the natural numbers excluding K */
 
sample3x3=[[1,2,3],[2,1,3],[3,1,2]]; 

/*----------------- Note: The previous four definitions are reserved Globals */

/* W is assumed to be
 * an 3*3 PARI-GP multivector, this is some like [[a,b,c],[d,e,f],[g,h,i]] instead
 * of an object like matrix(3,3,...) */
 
alt33matdet(v)=sum(j=1,3,((-1)^(j+1))*v[1][sample3x3[j][row]]*((v[2][sample3x3[j][dot]])*(v[3][sample3x3[j][ddot]])-(v[2][sample3x3[j][ddot]])*(v[3][sample3x3[j][dot]])));

/* Of course, such formula looks ugly when it is implemented but not so written by hand
 * in paper.
 */

/*
 * 
 * The basic idea of parallelization for the Narayana's algorithm then consists of
 * computing the "partition permutations matrix" for the first N naturals, and distribute
 * its rows one or more per thread/core(processor).
 *
 * All those threads/cores(processors) will run the Narayana's algorithm only for (N-1)!-1
 * steps over their corresponding inputs. At the end of all these executions, the N!
 * permutations in lexical order WILL BE ready and available to be used by the main
 * thread (or scheduler process, or ALU... the entity what applies).
 * 
 * (This proposal was already tested to work fine by a 
 *  human by hand-pencil-paper: The same author of this code.)
 * 
 * The key property making possible all what is described here is the fact that this kind
 * of items is uniformly distributed among all the N! permutations when they are in lexical
 * (ascending) order.
 * 
 */

/* The Narayana's lexical order algorithm for permutations.
 * Implementation of "a single step" */

nextLexPermuteOneStep(v)={my(w=v,A,L=#v,p=L-1,q=L-1,t);while((p && (w[p+1]<=w[p])),p--);p--;if((p<0),A=0, while(((q>p)&&(w[q+1]<=w[p+1])),q--);t=w[p+1];w[p+1]=w[q+1];w[q+1]=t;L--;p++;while(L>p,t=w[L+1];w[L+1]=w[p+1];w[p+1]=t;L--;p++;);A=1); A*w};

/* The Narayana's lexical order algorithm for permutations.
   Implementation of all the possible steps from a given input" */
   
nextLexPermuteAllSteps(v)={my(inputv=v,outputv=v,exitFlag=vector(#v));while(inputv!=exitFlag,print(outputv);outputv=nextLexPermuteOneStep(inputv);inputv=outputv)};


/* non-recursive definitions */

partitionPermutationsMatrix_iter1(n)=matrix(n,n,i,j,1+(i==1)*(j-1)+(j==1)*(i-1)+((j-1==i)&&(i>1))*i+((i!=1)&&(j!=1)&&(j-1!=i))*(j-(1+(i>=j))));
partitionPermutationsMatrix_iter2(k)={my(W=matrix(k, k, i, j, i==j), y, u=0, j=2); while(j<=k, W[j, j]+=u; y=j+1; while(y<=k, W[j, y]=W[j, j]; y++); y=j-1; while(y>=1, W[j, y]=W[j, j]+1; y--); W[1, j]=W[j, j]+1; u++; j++); W~}

/* recursive definition */

partitionPermutationsMatrix_recur(n)=if(n==1,matrix(1,1,i,j,1),my(p=partitionPermutationsMatrix_recur(n-1),q=matrix(n,n,i,j,0));for(i=1,n-1,for(j=1,n-1,q[i,j]=p[i,j]));q[n,1]=n;q[n,n]=n-1;for(j=2,n-1,q[n,j]=j-1);for(i=1,n-1,q[i,n]=n);q);

/* The selected version between iterative and/or recursive and the actually applied there
in the dependent computations */

partitionPermutationsMatrix_sel(n)=partitionPermutationsMatrix_iter1(n);

/* The idea is to allow the user compare the three alternatives */

/* Just an aid */
rowExtraction(n,m)=if(m<=n,my(x=partitionPermutationsMatrix_sel(n),y=vector(n,j,x[m,j]));y,vector(n));

/*
 * 
 * Just another aid. For illustrating the distribution scheme described previously...
 * It does run (as mentioned above) the Narayana's algorithm for the generation of
 * m-th block (1<=m<=n) of permutations. Then it does print the input and the
 * remaining (n-1)!-1 steps.
 * 
 */

showSteps(n,m)=if(m<=n,my(U=(n-1)!,inputv=rowExtraction(n,m),step,count=1);print(inputv);while(count++<=U,step=nextLexPermuteOneStep(inputv);print(step);inputv=step),print("Error!. Invalid call to showSteps(n,m). It should be m<=n."));

/* =========================================== =========================================== =========================================== */

/*-------------------------------------------------------------------------------------------------- 
  --------[EXAMPLE BEGINS HERE] -------------------------------------------------------------------- */

print("\n\tDear reader. Welcome!\n\n");

/* Test */

w=[[A11,A12,A13],[A21,A22,A23],[A31,A32,A33]];
m=matrix(3,3,i,j,w[i][j]);
d0=matdet(m);
d1=alt33matdet(w);
print("\tTest #1:\n\n\t\t(For 3*3 matrices) The built-in determinant routine matdet()\n\t\tand the alternative proposal alt33matdet() return"if(!(d0-d1)," identical "," different ")"results.\n");

C=3; /* 3 for readability, but the same conclusion is obtained for any greater C */

print("\tTest #2:\n\n\t(Part 1) Example of the Narayna's algorithm: Permutations for [1,2,3...N]; \n");
nextLexPermuteAllSteps(vector(C,j,j));
print("");

print("\t(Part 2) Example of the distribution scheme: Concatenation of showSteps(N,j) for j=1,2,3...N\n");
for(j=1,C,showSteps(C,j));
print("");

/* Final message */
print("\tAs you might verify it by inspection, both results are identical.");
print("\tAlso you can try the output files generated by sampleFilesForComparison();\n");

quit();

/*--------[EXAMPLE ENDS HERE] --------------------------------------------------------------------
  ------------------------------------------------------------------------------------------------ */

/* -------------------------------------------------------------------------------------------
 * For further study.
 * -------------------------------------------------------------------------------------------
 * Test #3. Please comment, delete, or swap carefully the preceding code for the EXAMPLE
 * section.
 * 
 * This time a pair of plain-text files containing similar outputs to the preceding example
 * will be generated. One of them (called "Data_A") for the Narayana's algorithm execution
 * without decomposition in blocks. The other (called "Data_B") apparently with the same
 * output but obtained by applying the distribution scheme.
 * 
 * The purpose is that the reader/user compare the checksums for both files.
 * These must match.
 * 
 * Suggested sequence of commands (Linux):
 * 
 * 1) rm -f Data_*;
 * 2) gp -q -f -s 1024000000 ./partitionPermutations.txt;
 * 3) md5sum Data_*;
 * 
 * Note:
 *
 * 	For a given C >= 1 (see below), this final test consists of the concatenation in
 * 	each file by using the corresponding method, of all the cases between 1 and C.
 * 
 * ------------------------------------------------------------------------------------------- */

nextLexPermuteAllSteps_disk(v)={my(inputv=v,outputv=v,exitFlag=vector(#v));while(inputv!=exitFlag,write("Data_A",outputv);outputv=nextLexPermuteOneStep(inputv);inputv=outputv)};

showSteps_disk(n,m)=if(m<=n,my(U=(n-1)!,inputv=rowExtraction(n,m),step,count=1);write("Data_B",inputv);while(count++<=U,step=nextLexPermuteOneStep(inputv);write("Data_B",step);inputv=step),print("Error!. Invalid call to showSteps(n,m). It should be m<=n. Please correct and try again.");quit());

sampleFilesForComparison(n)={nextLexPermuteAllSteps_disk(vector(n,j,j));for(k=1,n,showSteps_disk(n,k))};

C=10;

print1("\n\t\tPlease wait while the files \"Data_A\" and \"Data_B\" are generated... ");
for(l=1,C,sampleFilesForComparison(l));
print("Done!.\n");

quit();

/*
 * The expected answer (for C=10) is:
 * 
 * 806f4ed2d6e02c9f9ee16aaa67cac030  Data_A
 * 806f4ed2d6e02c9f9ee16aaa67cac030  Data_B
 * 
 * Or at least such common checksum was obtained
 * by the author in his own 32-Bit machine.
 * 
 * Of course, the relevant fact here is that for
 * every feasible value in C inside this particular
 * test both checksums should match.
 * 
 */

/*EOF*/